##
## Spearman's rank correlation rho
##
## data: valid$age and valid$hospdays
## S = 56530000, p-value = 2.296e-10
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.227336
There is a positive correlation between age and duration of hospitalization.
##
## Spearman's rank correlation rho
##
## data: valid$age and valid$hospdays
## S = 59538000, p-value = 1.403e-07
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.1894313
Results show a positive correlation between duration of hosp and age.
H1N1
H3N2
H1N1
No clear effect of season.
H3N2
No clear effect of season –> Try models that don’t include season and country as blocking vars
We know the relationship between age and probability of infection should be non-linear. Use cross-validation to verify, and to choose the number of degrees of freedom to include in a spline
Plot the overall test error rate vs. df
No clear preference. Use 4 df?
We know the relationship between age and probability of infection should be non-linear. Use cross-validation to verify, and to choose the number of degrees of freedom to include in a spline
Plot the overall test error rate vs. df
Conclusion: Clear preference for linear fit for H3N2
## integer(0)
## integer(0)
## integer(0)
## integer(0)
## integer(0)
## integer(0)
## integer(0)
##
## Argentina Australia Austria Belgium Chile China Denmark
## 50 69 3 11 1 15 67
## Germany Greece Norway Peru Poland Singapore Spain
## 41 81 7 15 6 6 40
## Thailand UK USA
## 64 131 114
## [1] 721
Treat vaccination, antivial use, underlying symptoms, country and season as blocking variables with fixed effects
library(splines)
## H1N1 fit to medical history only (no effect of country, season, age or imprinting)
fit00 = glm(hospdays ~ anyvac + anyav + anydx, data = train.H1N1)
summary(fit00)
##
## Call:
## glm(formula = hospdays ~ anyvac + anyav + anydx, data = train.H1N1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -9.111 -5.980 -3.703 0.297 90.471
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.121 1.268 3.250 0.00122 **
## anyvac1 -2.131 1.036 -2.056 0.04016 *
## anyav1 1.582 1.195 1.324 0.18590
## anydx1 3.407 1.032 3.300 0.00102 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 128.5975)
##
## Null deviance: 84839 on 648 degrees of freedom
## Residual deviance: 82945 on 645 degrees of freedom
## AIC: 4999.8
##
## Number of Fisher Scoring iterations: 2
## H1N1 fit to country, season and medical history only (no effect of age or imprinting)
fit0 = glm(hospdays ~ anyvac + anyav + anydx + country + season, data = train.H1N1)
summary(fit0)
##
## Call:
## glm(formula = hospdays ~ anyvac + anyav + anydx + country + season,
## data = train.H1N1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -15.839 -5.600 -2.717 0.785 91.142
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.2459 3.2847 1.597 0.11076
## anyvac1 -1.6792 1.0896 -1.541 0.12379
## anyav1 1.9835 1.2191 1.627 0.10425
## anydx1 2.3804 1.0585 2.249 0.02487 *
## countryAustralia -2.6710 2.6975 -0.990 0.32248
## countryBelgium 1.9673 4.6162 0.426 0.67013
## countryDenmark -4.9104 3.1565 -1.556 0.12030
## countryGermany 5.1522 3.3357 1.545 0.12296
## countryGreece -2.7547 3.0787 -0.895 0.37126
## countryOther 1.6503 3.0943 0.533 0.59400
## countrySpain -1.1243 3.3652 -0.334 0.73841
## countryThailand -4.1520 2.6743 -1.553 0.12104
## countryUK -2.3741 2.9263 -0.811 0.41750
## countryUSA -1.8117 2.9810 -0.608 0.54358
## seasonNH.10.11 5.0602 1.6700 3.030 0.00255 **
## seasonNH.11.12 -3.1345 4.5065 -0.696 0.48698
## seasonNH.12.13 -4.1199 3.3786 -1.219 0.22316
## seasonNH.13.14 1.0435 1.8370 0.568 0.57020
## seasonNH.14.15 1.9294 3.5393 0.545 0.58585
## seasonNH.15.16 0.2425 1.5790 0.154 0.87798
## seasonNH.16.17 -5.4515 4.0721 -1.339 0.18114
## seasonSH.10 4.6119 3.5179 1.311 0.19034
## seasonSH.11 1.7367 3.5131 0.494 0.62123
## seasonSH.12 -3.0302 5.2846 -0.573 0.56658
## seasonSH.13 -1.0079 3.5518 -0.284 0.77669
## seasonSH.14 8.1922 3.9675 2.065 0.03935 *
## seasonSH.15 1.0612 5.1690 0.205 0.83741
## seasonSH.16 -0.8627 2.5379 -0.340 0.73403
## seasonSH.17 -6.5325 6.6345 -0.985 0.32519
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 122.8775)
##
## Null deviance: 84839 on 648 degrees of freedom
## Residual deviance: 76184 on 620 degrees of freedom
## AIC: 4994.6
##
## Number of Fisher Scoring iterations: 2
## H1N1 fit to age only, plus fixed effects of vaccination, av use, underlying conditions, country and season
fit1 = glm(hospdays ~ ns(age, knots = 65) + anyvac + anyav + anydx + country + season, data = train.H1N1)
summary(fit1)
##
## Call:
## glm(formula = hospdays ~ ns(age, knots = 65) + anyvac + anyav +
## anydx + country + season, data = train.H1N1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -18.026 -5.409 -2.689 1.186 90.596
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.1570 3.4560 0.624 0.53277
## ns(age, knots = 65)1 9.0915 3.0852 2.947 0.00333 **
## ns(age, knots = 65)2 3.6643 2.7847 1.316 0.18871
## anyvac1 -2.2952 1.1092 -2.069 0.03894 *
## anyav1 2.0316 1.2121 1.676 0.09421 .
## anydx1 1.4890 1.0911 1.365 0.17287
## countryAustralia -1.8932 2.7184 -0.696 0.48642
## countryBelgium 3.4779 4.6327 0.751 0.45310
## countryDenmark -3.6158 3.1817 -1.136 0.25621
## countryGermany 6.6412 3.3539 1.980 0.04813 *
## countryGreece -2.5039 3.0698 -0.816 0.41502
## countryOther 2.3142 3.0955 0.748 0.45500
## countrySpain -0.4583 3.3667 -0.136 0.89178
## countryThailand -3.6032 2.6702 -1.349 0.17771
## countryUK -1.0510 2.9502 -0.356 0.72177
## countryUSA -0.9344 2.9900 -0.313 0.75476
## seasonNH.10.11 4.5327 1.6688 2.716 0.00679 **
## seasonNH.11.12 -2.0166 4.5005 -0.448 0.65425
## seasonNH.12.13 -5.1089 3.3737 -1.514 0.13045
## seasonNH.13.14 0.8928 1.8294 0.488 0.62571
## seasonNH.14.15 1.1918 3.5591 0.335 0.73783
## seasonNH.15.16 -0.5854 1.5953 -0.367 0.71379
## seasonNH.16.17 -5.6394 4.0486 -1.393 0.16415
## seasonSH.10 5.5224 3.5105 1.573 0.11620
## seasonSH.11 2.1123 3.4972 0.604 0.54606
## seasonSH.12 -1.6833 5.2763 -0.319 0.74981
## seasonSH.13 -0.2649 3.5419 -0.075 0.94040
## seasonSH.14 7.6254 3.9534 1.929 0.05421 .
## seasonSH.15 0.5029 5.1459 0.098 0.92217
## seasonSH.16 -1.2036 2.5256 -0.477 0.63384
## seasonSH.17 -7.2836 6.5999 -1.104 0.27020
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 121.4069)
##
## Null deviance: 84839 on 648 degrees of freedom
## Residual deviance: 75029 on 618 degrees of freedom
## AIC: 4988.7
##
## Number of Fisher Scoring iterations: 2
## H1N1 fit to age plus imprinting, plus fixed effects of vaccination, av use, underlying conditions, country and season
fit2 = glm(hospdays ~ ns(age, knots = 65) + p.g1.protection + anyvac + anyav + anydx + country + season, data = train.H1N1)
summary(fit2)
##
## Call:
## glm(formula = hospdays ~ ns(age, knots = 65) + p.g1.protection +
## anyvac + anyav + anydx + country + season, data = train.H1N1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -17.881 -5.335 -2.786 1.361 90.120
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.0872 3.4544 0.604 0.54592
## ns(age, knots = 65)1 4.8263 4.4761 1.078 0.28135
## ns(age, knots = 65)2 1.7606 3.1374 0.561 0.57489
## p.g1.protection 2.3671 1.8008 1.314 0.18918
## anyvac1 -2.3115 1.1086 -2.085 0.03747 *
## anyav1 2.0833 1.2120 1.719 0.08612 .
## anydx1 1.4358 1.0913 1.316 0.18875
## countryAustralia -1.9683 2.7174 -0.724 0.46914
## countryBelgium 3.2724 4.6326 0.706 0.48022
## countryDenmark -3.6001 3.1798 -1.132 0.25800
## countryGermany 6.6402 3.3519 1.981 0.04803 *
## countryGreece -2.6718 3.0707 -0.870 0.38459
## countryOther 2.2171 3.0946 0.716 0.47400
## countrySpain -0.3318 3.3661 -0.099 0.92152
## countryThailand -3.5277 2.6693 -1.322 0.18679
## countryUK -1.2312 2.9517 -0.417 0.67674
## countryUSA -1.1291 2.9920 -0.377 0.70601
## seasonNH.10.11 4.5969 1.6685 2.755 0.00604 **
## seasonNH.11.12 -2.3124 4.5035 -0.513 0.60780
## seasonNH.12.13 -4.8080 3.3795 -1.423 0.15533
## seasonNH.13.14 1.1754 1.8410 0.638 0.52341
## seasonNH.14.15 1.4174 3.5611 0.398 0.69076
## seasonNH.15.16 -0.2263 1.6176 -0.140 0.88880
## seasonNH.16.17 -5.7337 4.0469 -1.417 0.15704
## seasonSH.10 5.4131 3.5094 1.542 0.12347
## seasonSH.11 2.3277 3.4989 0.665 0.50614
## seasonSH.12 -1.8593 5.2749 -0.352 0.72459
## seasonSH.13 -0.3346 3.5402 -0.095 0.92472
## seasonSH.14 7.8071 3.9535 1.975 0.04874 *
## seasonSH.15 0.5883 5.1433 0.114 0.90897
## seasonSH.16 -1.0114 2.5283 -0.400 0.68928
## seasonSH.17 -7.2674 6.5960 -1.102 0.27098
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 121.2641)
##
## Null deviance: 84839 on 648 degrees of freedom
## Residual deviance: 74820 on 617 degrees of freedom
## AIC: 4988.8
##
## Number of Fisher Scoring iterations: 2
## H1N1 fit to age only, plus fixed effects of vaccination, av use, underlying conditions NO COUNTRY AND SEASON
fit3 = glm(hospdays ~ ns(age, knots = 65) + anyvac + anyav + anydx, data = train.H1N1)
summary(fit3)
##
## Call:
## glm(formula = hospdays ~ ns(age, knots = 65) + anyvac + anyav +
## anydx, data = train.H1N1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -10.629 -5.858 -3.551 0.348 90.087
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.340 1.645 1.422 0.15542
## ns(age, knots = 65)1 6.612 2.921 2.263 0.02396 *
## ns(age, knots = 65)2 2.540 2.664 0.954 0.34064
## anyvac1 -2.496 1.050 -2.378 0.01771 *
## anyav1 1.473 1.192 1.235 0.21712
## anydx1 2.777 1.063 2.614 0.00917 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 127.8588)
##
## Null deviance: 84839 on 648 degrees of freedom
## Residual deviance: 82213 on 643 degrees of freedom
## AIC: 4998
##
## Number of Fisher Scoring iterations: 2
## H1N1 fit to age plus imprinting, plus fixed effects of vaccination, av use, underlying conditions, NO COUNTRY AND SEASON
fit4 = glm(hospdays ~ ns(age, knots = 65) + p.g1.protection + anyvac + anyav + anydx, data = train.H1N1)
summary(fit4)
##
## Call:
## glm(formula = hospdays ~ ns(age, knots = 65) + p.g1.protection +
## anyvac + anyav + anydx, data = train.H1N1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -10.344 -5.924 -3.391 0.312 89.489
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.2786 1.6426 1.387 0.1659
## ns(age, knots = 65)1 1.1296 4.2243 0.267 0.7893
## ns(age, knots = 65)2 0.1362 2.9776 0.046 0.9635
## p.g1.protection 3.1891 1.7777 1.794 0.0733 .
## anyvac1 -2.5473 1.0482 -2.430 0.0154 *
## anyav1 1.5904 1.1920 1.334 0.1826
## anydx1 2.6530 1.0631 2.496 0.0128 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 127.4193)
##
## Null deviance: 84839 on 648 degrees of freedom
## Residual deviance: 81803 on 642 degrees of freedom
## AIC: 4996.8
##
## Number of Fisher Scoring iterations: 2
## Plot the fits for each model side by side
par(mfrow = c(1, 2), las = 3, mar = c(6, 6, 6, 3), cex.axis = .7)
library(gam)
## Loading required package: foreach
## Loaded gam 1.14-4
plot.gam(fit00, se = T, col = 'dodgerblue', main = 'Simplified Baseline')
plot.gam(fit0, se = T, col = 'dodgerblue', main = 'Baseline')
plot.gam(fit1, se = T, col = 'dodgerblue', main = 'Baseline + age + imp')
plot.gam(fit2, se = T, col = 'dodgerblue', main = 'Baseline + age + imp')
plot.gam(fit3, se = T, col = 'dodgerblue', main = 'Simple baseline + age + imp')
plot.gam(fit4, se = T, col = 'dodgerblue', main = 'Simple baseline + age + imp')
pdf('003Hospdays_H1N1.pdf')
plot.gam(fit4, se = T, col = 'dodgerblue', main = 'Simple baseline + age + imp')
dev.off()
## quartz_off_screen
## 2
# Predict probs for each patient
pred00 = predict(fit00, newdata = (test.H1N1))
pred0 = predict(fit0, newdata = (test.H1N1))
pred1 = predict(fit1, newdata = (test.H1N1))
pred2 = predict(fit2, newdata = (test.H1N1))
pred3 = predict(fit3, newdata = (test.H1N1))
pred4 = predict(fit4, newdata = (test.H1N1))
# Estimate the simulated error rate
MSE = numeric(6); names(MSE) = c('simple.baseline', 'baseline', 'baeline+age', 'baseline+age+imp', 'simple.baseline+age', 'simple.baseline+age+imp')
MSE[1] = mean((pred00 - test.H1N1$hospdays)^2)
MSE[2] = mean((pred0 - test.H1N1$hospdays)^2)
MSE[3] = mean((pred1 - test.H1N1$hospdays)^2)
MSE[4] = mean((pred2 - test.H1N1$hospdays)^2)
MSE[5] = mean((pred3 - test.H1N1$hospdays)^2)
MSE[6] = mean((pred4 - test.H1N1$hospdays)^2)
sort(MSE)
## simple.baseline+age simple.baseline+age+imp simple.baseline
## 171.9364 173.1208 174.8718
## baeline+age baseline+age+imp baseline
## 176.6496 177.7045 181.5595
AIC = c(fit00$aic, fit0$aic, fit1$aic, fit2$aic, fit3$aic, fit4$aic); names(AIC) = c('simple.baseline', 'baseline', 'baeline+age', 'baseline+age+imp', 'simple.baseline+age', 'simple.baseline+age+imp')
del.AIC = sort(AIC - min(AIC))
del.AIC
## baeline+age baseline+age+imp baseline
## 0.0000000 0.1851009 5.9113672
## simple.baseline+age+imp simple.baseline+age simple.baseline
## 8.0967682 9.3417324 11.0958038
load('master.AIC.table.H1N1.RData')
master.AIC.table['003Hospdays', c('simple.baseline', 'baseline', 'baseline+age', 'baseline+age+imp', 'simple.baseline+age', 'simple.baseline+age+imp')] = AIC - min(AIC)
save(master.AIC.table, file = 'master.AIC.table.H1N1.RData')
rm(master.AIC.table)
## integer(0)
## integer(0)
## integer(0)
## integer(0)
## integer(0)
## integer(0)
## integer(0)
##
## Argentina Australia Belgium China Denmark Germany Greece
## 97 120 3 7 26 19 24
## Peru Singapore Spain Thailand UK USA
## 9 21 29 120 89 174
## [1] 738
library(splines)
## H3N2 fit to medical history only (no effect of country, season, age or imprinting)
fit00 = glm(hospdays ~ anyvac + anyav + anydx, data = train.H3N2)
summary(fit00)
##
## Call:
## glm(formula = hospdays ~ anyvac + anyav + anydx, data = train.H3N2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -8.846 -5.869 -2.869 0.511 139.335
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.4656 1.5053 2.967 0.00312 **
## anyvac1 0.2035 0.9361 0.217 0.82797
## anyav1 -0.9769 1.1727 -0.833 0.40511
## anydx1 4.1767 1.2831 3.255 0.00119 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 134.0292)
##
## Null deviance: 90273 on 664 degrees of freedom
## Residual deviance: 88593 on 661 degrees of freedom
## AIC: 5150.4
##
## Number of Fisher Scoring iterations: 2
## H3N2 fit to country, season and medical history only (no effect of age or imprinting)
fit0 = glm(hospdays ~ anyvac + anyav + anydx + country + season, data = train.H3N2)
summary(fit0)
##
## Call:
## glm(formula = hospdays ~ anyvac + anyav + anydx + country + season,
## data = train.H3N2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -15.286 -5.789 -3.044 1.157 135.644
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.87186 4.91067 0.381 0.70319
## anyvac1 0.32796 1.08242 0.303 0.76200
## anyav1 -0.01256 1.25298 -0.010 0.99201
## anydx1 3.96803 1.34685 2.946 0.00334 **
## countryAustralia -1.60759 2.06883 -0.777 0.43742
## countryBelgium -3.26689 8.56617 -0.381 0.70305
## countryDenmark 0.85676 3.47023 0.247 0.80507
## countryGreece 2.45624 3.45284 0.711 0.47712
## countryOther 1.36542 3.12273 0.437 0.66208
## countrySingapore -7.96181 3.19157 -2.495 0.01286 *
## countrySpain -0.16994 3.28776 -0.052 0.95879
## countryThailand -3.78205 2.22425 -1.700 0.08955 .
## countryUK -1.74996 2.61535 -0.669 0.50367
## countryUSA -4.16761 2.54615 -1.637 0.10216
## seasonNH.11.12 2.90061 4.60199 0.630 0.52873
## seasonNH.12.13 4.05637 4.10030 0.989 0.32290
## seasonNH.13.14 2.79077 4.46038 0.626 0.53175
## seasonNH.14.15 5.06027 4.04882 1.250 0.21183
## seasonNH.15.16 2.75674 4.76703 0.578 0.56327
## seasonNH.16.17 2.40125 4.12000 0.583 0.56021
## seasonSH.10 17.03451 7.12019 2.392 0.01703 *
## seasonSH.11 3.80482 5.46660 0.696 0.48667
## seasonSH.12 3.32440 5.00129 0.665 0.50648
## seasonSH.13 2.60350 5.26530 0.494 0.62115
## seasonSH.14 4.78322 4.66593 1.025 0.30569
## seasonSH.15 3.62369 4.50804 0.804 0.42180
## seasonSH.16 5.02466 4.54540 1.105 0.26939
## seasonSH.17 5.52862 4.68474 1.180 0.23839
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 131.7959)
##
## Null deviance: 90273 on 664 degrees of freedom
## Residual deviance: 83954 on 637 degrees of freedom
## AIC: 5162.6
##
## Number of Fisher Scoring iterations: 2
## H3N2 fit to age only, plus fixed effects of vaccination, av use, underlying conditions, country and season
fit1 = glm(hospdays ~ ns(age, knots = 65) + anyvac + anyav + anydx + country + season, data = train.H3N2)
summary(fit1)
##
## Call:
## glm(formula = hospdays ~ ns(age, knots = 65) + anyvac + anyav +
## anydx + country + season, data = train.H3N2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -14.529 -5.682 -2.864 1.157 135.440
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.54929 5.00216 0.110 0.9126
## ns(age, knots = 65)1 4.43773 3.49309 1.270 0.2044
## ns(age, knots = 65)2 2.58926 1.91507 1.352 0.1768
## anyvac1 -0.01318 1.09988 -0.012 0.9904
## anyav1 -0.02329 1.25255 -0.019 0.9852
## anydx1 3.08884 1.45323 2.126 0.0339 *
## countryAustralia -1.31527 2.07403 -0.634 0.5262
## countryBelgium -3.19438 8.55691 -0.373 0.7090
## countryDenmark 1.20075 3.47199 0.346 0.7296
## countryGreece 2.48421 3.44904 0.720 0.4716
## countryOther 1.47824 3.11986 0.474 0.6358
## countrySingapore -7.53125 3.21253 -2.344 0.0194 *
## countrySpain -0.04603 3.28565 -0.014 0.9888
## countryThailand -3.65539 2.22341 -1.644 0.1007
## countryUK -1.14258 2.63348 -0.434 0.6645
## countryUSA -3.43398 2.57471 -1.334 0.1828
## seasonNH.11.12 2.67543 4.60754 0.581 0.5617
## seasonNH.12.13 3.74415 4.11023 0.911 0.3627
## seasonNH.13.14 2.58994 4.46203 0.580 0.5618
## seasonNH.14.15 4.87533 4.05510 1.202 0.2297
## seasonNH.15.16 2.47911 4.76432 0.520 0.6030
## seasonNH.16.17 2.25768 4.12439 0.547 0.5843
## seasonSH.10 16.16262 7.12952 2.267 0.0237 *
## seasonSH.11 4.18134 5.46665 0.765 0.4446
## seasonSH.12 3.02691 5.00638 0.605 0.5457
## seasonSH.13 2.30984 5.27844 0.438 0.6618
## seasonSH.14 4.84586 4.66267 1.039 0.2991
## seasonSH.15 3.53934 4.50396 0.786 0.4323
## seasonSH.16 4.80866 4.54368 1.058 0.2903
## seasonSH.17 5.52250 4.68314 1.179 0.2387
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 131.5035)
##
## Null deviance: 90273 on 664 degrees of freedom
## Residual deviance: 83505 on 635 degrees of freedom
## AIC: 5163
##
## Number of Fisher Scoring iterations: 2
## H3N2 fit to age plus imprinting, plus fixed effects of vaccination, av use, underlying conditions, country and season
fit2 = glm(hospdays ~ ns(age, knots = 65) + p.g2.protection + anyvac + anyav + anydx + country + season, data = train.H3N2)
summary(fit2)
##
## Call:
## glm(formula = hospdays ~ ns(age, knots = 65) + p.g2.protection +
## anyvac + anyav + anydx + country + season, data = train.H3N2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -14.429 -5.650 -2.712 1.280 135.283
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.23457 5.52487 0.585 0.5584
## ns(age, knots = 65)1 -0.35194 5.45352 -0.065 0.9486
## ns(age, knots = 65)2 1.07055 2.33017 0.459 0.6461
## p.g2.protection -2.81326 2.46026 -1.143 0.2533
## anyvac1 -0.05271 1.10015 -0.048 0.9618
## anyav1 -0.15717 1.25771 -0.125 0.9006
## anydx1 3.15385 1.45399 2.169 0.0304 *
## countryAustralia -1.29043 2.07364 -0.622 0.5340
## countryBelgium -2.47299 8.57807 -0.288 0.7732
## countryDenmark 1.18684 3.47118 0.342 0.7325
## countryGreece 2.36692 3.44973 0.686 0.4929
## countryOther 1.24362 3.12584 0.398 0.6909
## countrySingapore -7.74596 3.21723 -2.408 0.0163 *
## countrySpain -0.17161 3.28669 -0.052 0.9584
## countryThailand -3.76004 2.22475 -1.690 0.0915 .
## countryUK -1.20828 2.63347 -0.459 0.6465
## countryUSA -3.43951 2.57409 -1.336 0.1820
## seasonNH.11.12 2.79283 4.60756 0.606 0.5446
## seasonNH.12.13 3.92670 4.11234 0.955 0.3400
## seasonNH.13.14 2.83014 4.46589 0.634 0.5265
## seasonNH.14.15 5.17402 4.06252 1.274 0.2033
## seasonNH.15.16 2.69096 4.76677 0.565 0.5726
## seasonNH.16.17 2.60754 4.13473 0.631 0.5285
## seasonSH.10 16.56039 7.13628 2.321 0.0206 *
## seasonSH.11 4.66232 5.48149 0.851 0.3953
## seasonSH.12 2.95647 5.00555 0.591 0.5550
## seasonSH.13 2.32712 5.27718 0.441 0.6594
## seasonSH.14 4.82949 4.66156 1.036 0.3006
## seasonSH.15 3.57223 4.50297 0.793 0.4279
## seasonSH.16 4.99747 4.54558 1.099 0.2720
## seasonSH.17 5.72480 4.68535 1.222 0.2222
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 131.4398)
##
## Null deviance: 90273 on 664 degrees of freedom
## Residual deviance: 83333 on 634 degrees of freedom
## AIC: 5163.7
##
## Number of Fisher Scoring iterations: 2
## H3N2 fit to age only, plus fixed effects of vaccination, av use, underlying conditions NO COUNTRY AND SEASON
fit3 = glm(hospdays ~ ns(age, knots = 65) + anyvac + anyav + anydx, data = train.H3N2)
summary(fit3)
##
## Call:
## glm(formula = hospdays ~ ns(age, knots = 65) + anyvac + anyav +
## anydx, data = train.H3N2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -10.946 -5.757 -2.958 0.677 139.007
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.18653 1.87100 1.703 0.0890 .
## ns(age, knots = 65)1 4.78461 3.43792 1.392 0.1645
## ns(age, knots = 65)2 4.20223 1.83462 2.291 0.0223 *
## anyvac1 -0.04308 0.93732 -0.046 0.9634
## anyav1 -0.95766 1.16866 -0.819 0.4128
## anydx1 3.20425 1.38096 2.320 0.0206 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 132.976)
##
## Null deviance: 90273 on 664 degrees of freedom
## Residual deviance: 87631 on 659 degrees of freedom
## AIC: 5147.1
##
## Number of Fisher Scoring iterations: 2
## H3N2 fit to age plus imprinting, plus fixed effects of vaccination, av use, underlying conditions, NO COUNTRY AND SEASON
fit4 = glm(hospdays ~ ns(age, knots = 65) + p.g2.protection + anyvac + anyav + anydx, data = train.H3N2)
summary(fit4)
##
## Call:
## glm(formula = hospdays ~ ns(age, knots = 65) + p.g2.protection +
## anyvac + anyav + anydx, data = train.H3N2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -10.889 -5.759 -3.029 0.867 138.921
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.48083 3.02042 1.815 0.0700 .
## ns(age, knots = 65)1 0.81658 5.35135 0.153 0.8788
## ns(age, knots = 65)2 2.90823 2.27036 1.281 0.2007
## p.g2.protection -2.31650 2.39401 -0.968 0.3336
## anyvac1 -0.04439 0.93737 -0.047 0.9622
## anyav1 -1.03393 1.17137 -0.883 0.3777
## anydx1 3.27513 1.38297 2.368 0.0182 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 132.9888)
##
## Null deviance: 90273 on 664 degrees of freedom
## Residual deviance: 87507 on 658 degrees of freedom
## AIC: 5148.2
##
## Number of Fisher Scoring iterations: 2
## Plot the fits for each model side by side
par(mfrow = c(1, 2), las = 3, mar = c(6, 6, 6, 3), cex.axis = .7)
library(gam)
plot.gam(fit00, se = T, col = 'firebrick1', main = 'Simplified Baseline')
plot.gam(fit0, se = T, col = 'firebrick1', main = 'Baseline')
plot.gam(fit1, se = T, col = 'firebrick1', main = 'Baseline + age + imp')
plot.gam(fit2, se = T, col = 'firebrick1', main = 'Baseline + age + imp')
plot.gam(fit3, se = T, col = 'firebrick1', main = 'Simple baseline + age + imp')
plot.gam(fit4, se = T, col = 'firebrick1', main = 'Simple baseline + age + imp')
pdf('003Hospdays_H3N2.pdf')
plot.gam(fit4, se = T, col = 'firebrick1', main = 'Simple baseline + age + imp')
dev.off()
## quartz_off_screen
## 2
# Predict probs for each patient
pred00 = predict(fit00, newdata = test.H3N2)
pred0 = predict(fit0, newdata = test.H3N2)
pred1 = predict(fit1, newdata = test.H3N2)
pred2 = predict(fit2, newdata = test.H3N2)
pred3 = predict(fit3, newdata = test.H3N2)
pred4 = predict(fit4, newdata = test.H3N2)
# Estimate the simulated error rate
MSE = numeric(6); names(MSE) = c('simple.baseline', 'baseline', 'baeline+age', 'baseline+age+imp', 'simple.baseline+age', 'simple.baseline+age+imp')
MSE[1] = mean((pred00 - test.H3N2$hospdays)^2)
MSE[2] = mean((pred0 - test.H3N2$hospdays)^2)
MSE[3] = mean((pred1 - test.H3N2$hospdays)^2)
MSE[4] = mean((pred2 - test.H3N2$hospdays)^2)
MSE[5] = mean((pred3 - test.H3N2$hospdays)^2)
MSE[6] = mean((pred4 - test.H3N2$hospdays)^2)
sort(MSE)
## baeline+age baseline+age+imp baseline
## 107.1434 107.5259 107.5507
## simple.baseline+age simple.baseline simple.baseline+age+imp
## 109.5733 109.9962 109.9967
AIC = c(fit00$aic, fit0$aic, fit1$aic, fit2$aic, fit3$aic, fit4$aic); names(AIC) = c('simple.baseline', 'baseline', 'baseline+age', 'baseline+age+imp', 'simple.baseline+age', 'simple.baseline+age+imp')
del.AIC = sort(AIC - min(AIC))
del.AIC
## simple.baseline+age simple.baseline+age+imp simple.baseline
## 0.000000 1.054417 3.261739
## baseline baseline+age baseline+age+imp
## 15.492766 15.924631 16.554570
load('master.AIC.table.H3N2.RData')
master.AIC.table['003Hospdays', c('simple.baseline', 'baseline', 'baseline+age', 'baseline+age+imp', 'simple.baseline+age', 'simple.baseline+age+imp')] = AIC - min(AIC)
save(master.AIC.table, file = 'master.AIC.table.H3N2.RData')
rm(master.AIC.table)